//
// Project: Disagreement in science: Missing women



clear all
version 15.1  



//
// set locals

// method of identifying female variable
local female "female_genderize"

// gender of author is known
local known_gender "female_genderize!=."





//
// AER PREVIOUS PUBS FROM ALL AEA JOURNALS

// call data
use "${data}/output/aer_data_gender.dta", clear
drop if month=="May" & year!=2019  // exclude AEA papers and proceedings
drop if year==2020  
keep if (comment | research_article)
append using "${data}/output/all_aea_data_nogender.dta"  // include observations from all AEA journals to capture seniority
keep if journal=="American Economic Review" | type=="JOURNAL ARTICLE" | type=="PAPERS" | type=="REGULAR ARTICLE" | type=="REGULAR ARTICLES" | type=="Regular papers" | type=="SHORTER PAPERS"

// generate number of previous publications in the journal by author
bysort full_name (article_id_chronological): generate previous_pubs = _n
keep if journal=="American Economic Review"  // observations from the AER
bysort full_name: egen double max_cumul_pubs = max(previous_pubs)
bysort full_name: keep if previous_pubs==max_cumul_pubs
replace previous_pubs = 5 if previous_pubs>5 & previous_pubs<.

// analysis
tab previous_pubs if `known_gender', matcell(a)
matrix list a
local N = a[1,1]+a[2,1]+a[3,1]+a[4,1]+a[5,1]
forvalues i=1/5 {
	scalar a1_`i' = a[`i',1]/`N'
}



//
// ASR

// call data
use "${data}/output/asr_data_gender.dta", clear
keep if (comment | research_article)

// generate number of previous publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n
bysort full_name: egen double max_cumul_pubs = max(previous_pubs)
bysort full_name: keep if previous_pubs==max_cumul_pubs
replace previous_pubs = 5 if previous_pubs>5 & previous_pubs<.

// analysis
tab previous_pubs if `known_gender', matcell(a)
matrix list a
local N = a[1,1]+a[2,1]+a[3,1]+a[4,1]+a[5,1]
forvalues i=1/5 {
	scalar a2_`i' = a[`i',1]/`N'
}



//
// JAMA

// call data
use "${data}/output/jama_pubmed_data_gender.dta", clear
drop if year==2020  
drop if year<2002  // full author names from PubMed not available
keep if comment | research_article
drop if article_with_etal
drop if strpos(full_name, "Fontanarosa")  // this JAMA editor appeared as first author of letters to the editor

// generate number of previous publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n
bysort full_name: egen double max_cumul_pubs = max(previous_pubs)
bysort full_name: keep if previous_pubs==max_cumul_pubs
replace previous_pubs = 5 if previous_pubs>5 & previous_pubs<.

// analysis
tab previous_pubs if `known_gender', matcell(a)
matrix list a
local N = a[1,1]+a[2,1]+a[3,1]+a[4,1]+a[5,1]
forvalues i=1/5 {
	scalar a3_`i' = a[`i',1]/`N'
}



//
// Nature

// call data
use "${data}/output/nature_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// generate number of previous publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n
bysort full_name: egen double max_cumul_pubs = max(previous_pubs)
bysort full_name: keep if previous_pubs==max_cumul_pubs
replace previous_pubs = 5 if previous_pubs>5 & previous_pubs<.

// analysis
tab previous_pubs if `known_gender', matcell(a)
matrix list a
local N = a[1,1]+a[2,1]+a[3,1]+a[4,1]+a[5,1]
forvalues i=1/5 {
	scalar a4_`i' = a[`i',1]/`N'
}



//
// PNAS 

// call data
use "${data}/output/pnas_data_gender.dta", clear
drop if full_name=="II" | full_name=="III" | full_name=="IV" | full_name=="Jr" | full_name=="Jr."  // erroneously scraped as separate author-article observations
drop if year==2020  // PNAS started comments in 2008
keep if comment | research_article

// generate number of previous publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n
bysort full_name: egen double max_cumul_pubs = max(previous_pubs)
bysort full_name: keep if previous_pubs==max_cumul_pubs
replace previous_pubs = 5 if previous_pubs>5 & previous_pubs<.

// analysis
tab previous_pubs if `known_gender', matcell(a)
matrix list a
local N = a[1,1]+a[2,1]+a[3,1]+a[4,1]+a[5,1]
forvalues i=1/5 {
	scalar a5_`i' = a[`i',1]/`N'
}



//
// Science

// call data
use "${data}/output/science_data_gender.dta", clear
drop if year==2020
keep if comment | research_article

// generate number of previous publications in the journal by author
bysort full_name (article_id): generate previous_pubs = _n
bysort full_name: egen double max_cumul_pubs = max(previous_pubs)
bysort full_name: keep if previous_pubs==max_cumul_pubs
replace previous_pubs = 5 if previous_pubs>5 & previous_pubs<.

// analysis
tab previous_pubs if `known_gender', matcell(a)
matrix list a
local N = a[1,1]+a[2,1]+a[3,1]+a[4,1]+a[5,1]
forvalues i=1/5 {
	scalar a6_`i' = a[`i',1]/`N'
}

